'''
Created on Jul 16, 2012

@author: xding
'''

import random
from shortcuts import log_info
from shortestpath import dijkstra
from networks import connected_components, steiner


def graph_hubs(graph, source_nodes, destination_nodes):
    ''' graph_hierarchy level 2
        finding hubs: a set of nodes covering a certain number of shortest paths between a set of nodes
        @return: a list of hub nodes and a list of paths
    '''
    
    def rand_pop(l):
        i = random.randint(0, len(l) - 1)
        l[i], l[-1] = l[-1], l[i]
        return l.pop()
    
    components = connected_components(graph)
    components = [item for item in components if len(item) > 2]
    if len(components) > 1:
        log_info("%d connect components (size > 2) found" % len(components))
        hub_nodes = []
        highway_arcs = []
        for sub_grpah in components:
            sub_hub_nodes, sub_highway_arcs = graph_hubs(sub_grpah, source_nodes, destination_nodes)
            hub_nodes.extend(sub_hub_nodes)
            highway_arcs.extend(sub_highway_arcs)
        log_info("Totally %d hub nodes and %d highway arcs found" % (len(hub_nodes), len(highway_arcs)))
        return hub_nodes, highway_arcs
    
    graph_size = len(graph)
    max_num_of_paths = 3 * len(graph) / 2
    log_info("connect graph size %d, around %d paths to cover" % (graph_size, max_num_of_paths))
    
    path_list = []
    search_space = set()
    for x in source_nodes:
        for y in destination_nodes:
            if x != y and x in graph and y in graph and x not in graph[y] and y not in graph[x]:
                search_space.add((x, y))
                search_space.add((y, x))
    
    while len(search_space) > 0 and max_num_of_paths > 0:
        max_num_of_paths -= 1
#        if max_num_of_paths % 100 == 0:
#            log_info(" - %d paths left" % max_num_of_paths)
        (dep, arr) = search_space.pop()
        path, _cost = dijkstra(graph, dep, arr)
        path_list.append(path)
        for k in range(2, len(path)):
            for (x, y) in zip(path[:-k], path[k:]):
                search_space -= {(x, y)}
    
    """ recalculate incident paths,
        now we remove the source and the destination from each path
        and only interested by the first 1/2 of in the set
    """
    path_list = sorted(path_list, key = lambda x : -len(x))
    #path_list = path_list[:len(path_list)/2+1]
    incident_paths = {node: set() for node in graph}
    for i,path in enumerate(path_list):
        for node in path[1:-1]:
            incident_paths[node].add(i)
    
    #find hubs convering half of the paths found
    hub_nodes = []
    while True:
        hub = max(incident_paths, key = lambda x : len(incident_paths[x]))
        if len(incident_paths[hub]) == 0:
            break
        hub_nodes.append(hub)
        paths_to_remove = list(incident_paths[hub])
        for path in paths_to_remove:
            for node in incident_paths:
                incident_paths[node] -= {path}
    log_info("%d hub nodes found" % len(hub_nodes))
    
    #calculate highway links
    highway_arcs = steiner(graph, hub_nodes)
    log_info("%d highway arcs found" % len(highway_arcs))
    
    return hub_nodes, highway_arcs

